This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
library(tidyverse)
gdp_df <- read_csv("../data/gdp_per_capita.csv")
head(gdp_df, 10)
NA
tail(gdp_df, 10)
gdp_df_drop <- gdp_df |>
select(-`Value Footnotes`)
gdp_df_drop
NA
gdp_df_drop_rename <- gdp_df_drop |>
rename(c(GDP_Per_Capita = Value, Country = `Country or Area`))
gdp_df_drop_rename
min(gdp_df_drop_rename[,2])
[1] 1990
max(gdp_df_drop_rename[,2])
[1] 2022
How many observations are there per year?
no_obsv_year <- gdp_df_drop_rename |>
count(Year)
no_obsv_year
Make a plot to view the number of observations per year.
no_obsv_year |>
ggplot(aes(x = Year, y = n)) +
geom_col()
no_obsv_ctry <- gdp_df_drop_rename |>
count(Country)
no_obsv_ctry
NA
gdp_df_drop_rename |>
distinct(Country[!is.na(Country)])
gdp_df_drop_rename |>
distinct(Country)
NA
length(unique(gdp_df_drop_rename$Country))
[1] 242
Which countries have the fewest observations?
min(no_obsv_ctry$n)
[1] 10
no_obsv_ctry |>
mutate(freq = min(n))
NA
tail(sort(no_obsv_ctry$Country), 3)
[1] "World" "Zambia" "Zimbabwe"
with(unique(no_obsv_ctry[c("Country", "n")]), table(no_obsv_ctry))
n
Country 10 12 14 15 16 19 20 22 23 24 25 26 28 29 30 31 32 33
Afghanistan 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
Africa Eastern and Southern 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Africa Western and Central 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Albania 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Algeria 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Angola 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Antigua and Barbuda 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Arab World 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Argentina 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Armenia 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Aruba 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
Australia 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Austria 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Azerbaijan 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Bahrain 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Bangladesh 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Barbados 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Belarus 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Belgium 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Belize 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Benin 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Bermuda 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Bhutan 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
Bolivia 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Bosnia and Herzegovina 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
Botswana 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Brazil 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Brunei 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Bulgaria 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Burkina Faso 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Burundi 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Côte d'Ivoire 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Cabo Verde 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Cambodia 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
Cameroon 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Canada 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Caribbean small states 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Cayman Islands 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
Central African Republic 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Central Europe and the Baltics 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Chad 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Chile 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
China 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Colombia 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Comoros 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Congo 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Costa Rica 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Croatia 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
Curaçao 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
Cyprus 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Czechia 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Dem. Rep. Congo 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Denmark 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Djibouti 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dominica 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
[ reached 'max' / getOption("max.print") -- omitted 187 rows ]
table(gdp_df_drop_rename$Country)
Afghanistan Africa Eastern and Southern Africa Western and Central
20 33 33
Albania Algeria Angola
33 33 33
Antigua and Barbuda Arab World Argentina
33 33 33
Armenia Aruba Australia
33 32 33
Austria Azerbaijan Bahrain
33 33 33
Bangladesh Barbados Belarus
33 33 33
Belgium Belize Benin
33 33 33
Bermuda Bhutan Bolivia
33 32 33
Bosnia and Herzegovina Botswana Brazil
29 33 33
Brunei Bulgaria Burkina Faso
33 33 33
Burundi Côte d'Ivoire Cabo Verde
33 33 33
Cambodia Cameroon Canada
30 33 33
Caribbean small states Cayman Islands Central African Republic
33 16 33
Central Europe and the Baltics Chad Chile
33 33 33
China Colombia Comoros
33 33 33
Congo Costa Rica Croatia
33 33 28
Curaçao Cyprus Czechia
22 33 33
Dem. Rep. Congo Denmark Djibouti
33 33 10
Dominica Dominican Republic Early-demographic dividend
33 33 33
East Asia & Pacific East Asia & Pacific (excluding high income) East Asia & Pacific (IDA & IBRD)
33 33 33
Ecuador Egypt El Salvador
33 33 33
Equatorial Guinea Estonia Eswatini
33 28 33
Ethiopia Euro area Europe & Central Asia
33 33 33
Europe & Central Asia (excluding high income) Europe & Central Asia (IDA & IBRD) European Union
33 33 33
Fiji Finland Fragile and conflict affected situations
33 33 33
France Gabon Georgia
33 33 33
Germany Ghana Greece
33 33 33
Grenada Guatemala Guinea
33 33 33
Guinea-Bissau Guyana Haiti
33 33 33
Heavily indebted poor countries (HIPC) High income Honduras
33 33 33
Hong Kong SAR, China Hungary IBRD only
33 32 33
Iceland IDA & IBRD total IDA blend
28 33 33
IDA only IDA total India
33 33 33
Indonesia Iran Iraq
33 33 33
Ireland Israel Italy
33 28 33
Jamaica Japan Jordan
33 33 33
Kazakhstan Kenya Kiribati
33 33 33
Korea Kosovo Kuwait
33 15 31
Kyrgyz Republic Lao PDR Late-demographic dividend
33 33 33
Latin America & Caribbean Latin America & Caribbean (excluding high income) Latin America & Caribbean (IDA & IBRD)
33 33 33
Latvia Least developed countries: UN classification Lebanon
28 33 32
Lesotho Liberia Libya
33 23 24
Lithuania Low & middle income Low income
28 33 33
Lower middle income Luxembourg Macao SAR, China
33 33 33
Madagascar Malawi Malaysia
33 33 33
Maldives Mali Malta
28 33 33
Marshall Islands Mauritania Mauritius
33 33 33
Mexico Micronesia Middle East & North Africa
33 33 33
Middle East & North Africa (excluding high income) Middle East & North Africa (IDA & IBRD) Middle income
33 33 33
Moldova Mongolia Montenegro
28 33 26
Morocco Mozambique Myanmar
33 33 33
Namibia Nauru Nepal
33 19 33
Netherlands New Zealand Nicaragua
33 33 33
Niger Nigeria North America
33 33 33
North Macedonia Norway OECD members
33 33 33
Oman Other small states Pacific island small states
33 23 33
Pakistan Palau Panama
33 22 33
Papua New Guinea Paraguay Peru
33 33 33
Philippines Poland Portugal
33 33 33
Post-demographic dividend Pre-demographic dividend Puerto Rico
33 33 33
Qatar Romania Russia
23 33 33
Rwanda São Tomé and Principe Samoa
33 22 33
San Marino Saudi Arabia Senegal
25 33 33
Serbia Seychelles Sierra Leone
28 33 33
Singapore Sint Maarten (Dutch part) Slovak Republic
33 14 31
Slovenia Small states Solomon Islands
28 26 33
Somalia South Africa South Asia
10 33 33
South Asia (IDA & IBRD) Spain Sri Lanka
33 33 33
St. Kitts and Nevis St. Lucia St. Vincent and the Grenadines
33 33 33
Sub-Saharan Africa Sub-Saharan Africa (excluding high income) Sub-Saharan Africa (IDA & IBRD)
33 33 33
Sudan Suriname Sweden
33 33 33
Switzerland Türkiye Tajikistan
33 33 33
Tanzania Thailand The Bahamas
33 33 33
The Gambia Timor-Leste Togo
33 23 33
Tonga Trinidad and Tobago Tunisia
32 33 33
Turkmenistan Turks and Caicos Islands Tuvalu
31 12 33
Uganda Ukraine United Arab Emirates
33 33 33
United Kingdom United States Upper middle income
33 33 33
Uruguay Uzbekistan Vanuatu
33 33 33
Viet Nam West Bank and Gaza World
33 29 33
Zambia Zimbabwe
33 33
num_obsv_ctry <- gdp_df_drop_rename |>
count(Country, sort = TRUE, name = "num_obsv_of_ctry")
tail(num_obsv_ctry)
NA
lowest_num_obsrv_ctry <- num_obsv_ctry |>
arrange(num_obsv_of_ctry) |>
slice(1:5)
lowest_num_obsrv_ctry
min(num_obsv_ctry$num_obsv_of_ctry, na.rm = TRUE)
[1] 10
gdp_2021 <- gdp_df_drop_rename |>
filter(Year == "2021")
gdp_2021
NA
gdp_2021 |>
count(Year)
min(gdp_2021[,2])
[1] 2021
max(gdp_2021[,2])
[1] 2021
summary(gdp_2021$GDP_Per_Capita)
Min. 1st Qu. Median Mean 3rd Qu. Max.
714.1 5044.2 13754.8 20711.6 30974.4 115683.5
summary(gdp_2021)
Country Year GDP_Per_Capita
Length:241 Min. :2021 Min. : 714.1
Class :character 1st Qu.:2021 1st Qu.: 5044.2
Mode :character Median :2021 Median : 13754.8
Mean :2021 Mean : 20711.6
3rd Qu.:2021 3rd Qu.: 30974.4
Max. :2021 Max. :115683.5
ggplot( gdp_2021, aes( x= GDP_Per_Capita ) ) +
geom_histogram( bins=50, color = "white", fill = "skyblue4" ) +
labs(
title = "GDP Per Capita in 2021",
x = "GDP per Capita",
y = "Frequency or Number of Countries"
)
num_obsv_ctry_2021 <- gdp_2021 |>
arrange(GDP_Per_Capita, sort = TRUE)
num_obsv_ctry_2021
head(num_obsv_ctry_2021)
tail(num_obsv_ctry_2021)
lowest_gdp_2021 <- num_obsv_ctry_2021 |>
slice(1:5)
lowest_gdp_2021
NA
highest_gdp_2021 <- num_obsv_ctry_2021 |>
slice(tail(row_number(), 5))
highest_gdp_2021
NA
interim <- gdp_df_drop_rename |>
filter(Year == c(1990, 2021)) |>
pivot_wider(
names_from = Year,
values_from = GDP_Per_Capita
)
gdp_pivoted <- interim[complete.cases(interim), ]
gdp_pivoted
NA
NA
gdp_pivoted$Percent_Change <- round(((gdp_pivoted$`2021` - gdp_pivoted$`1990`) / gdp_pivoted$`1990` )* 100, 4)
# gdp_pivoted$Percent_Change <- (((gdp_pivoted$`2021` - gdp_pivoted$`1990`) / gdp_pivoted$`1990` )* 100)
gdp_pivoted
gdp_pivoted[gdp_pivoted[4] < 0, ]
NA
gdp_pivoted[which.max(gdp_pivoted$Percent_Change),]
gdp_pivoted |>
arrange(desc(Percent_Change)) |>
slice(1:5)
NA
Create a line plot showing these country’s GDP per capita for all years for which you have data.
gdp_guyana <- gdp_df_drop_rename |>
filter(Country == "Guyana")
ggplot(data=gdp_guyana, aes(x=Year, y=GDP_Per_Capita, group=1)) +
geom_line()+
geom_point()
gdp_guyana
NA
gdp_albania <- gdp_df_drop_rename |>
filter(Country == "Albania")
ggplot(data=gdp_albania, aes(x=Year, y=GDP_Per_Capita, group=1)) +
geom_line() +
geom_point()
gdp_albania
NA
Put both line charts on the same plot.
gdp_albania
gdp_guyana
plot(gdp_albania$Year, gdp_albania$GDP_Per_Capita,
main = "GDP Per Capita Albania and Guyana 1990-2021",
xlab = "Years",
ylab = "GDP Per Capita",
type = "l",
ylim = c(0, 35000)
)
#
lines(gdp_guyana$Year, gdp_guyana$GDP_Per_Capita,
lty = "dashed")
#
labels <- c("Albania", "Guyana")
#
legend("topleft", legend = labels,
cex = 0.6,
inset = 0.03,
pch = NULL,
lty = c(1, 2))
ggplot(data=gdp_albania, aes(x=Year, y=GDP_Per_Capita, group=1)) +
geom_line(color="red")+
geom_point(color="red")+
labs(x = "Year", y = "GDP_Per_capita") +
geom_point(data=gdp_guyana, color="darkgreen", aes(x=Year, y=GDP_Per_Capita, group=1) +
geom_line(color="darkgreen")
)
continents <- read_csv("../data/continents.csv")
continents
# head(continents, 10)
gdp_df <- merge(gdp_df_drop_rename, continents, by="Country")
gdp_df
num_continents <- gdp_df |>
group_by(Continent) |>
summarise(Num_Countries_per_Continent = n_distinct(Country))
num_continents
NA
gdp_df_2021_continents <- gdp_df |>
filter(Year == 2021)
gdp_df_2021_continents
my_colors <- c("#FFA500", "#008000", "#1E90FF", "#FF1493", "red", "purple")
boxplot(GDP_Per_Capita ~ Continent, data = gdp_df_2021_continents,
main = "GDP per Capita in 2021 by Continent", xlab = "Continents", ylab = "GDP Per Capita",
col = my_colors, border = "black", notch = FALSE, notchwidth = 0.5,
medcol = "white", whiskcol = "black", boxwex = 0.5, outpch = 19,
outcol = "black"
)
# text(seq_along(unique(gdp_df_2021_continents$Continent)), par("usr")[3],
# labels = seq_along(unique(gdp_df_2021_continents$Continent)), srt = 45, adj = c(1.1, 1.1), xpd = TRUE)
#
# boxplot(GDP_Per_Capita ~ Continent, data = gdp_df_2021_continents, pars = list(xaxt = "n"))
# axis(2, at=gdp_df_2021_continents$Continent, labels = TRUE)
# text(y = seq(0, 100, by=20), par("usr")[1], labels = lablist.y, srt = 45, pos = 2, xpd = TRUE)
# # axis(1, at=c(1,2), labels = FALSE)
# text(c(1:6), par("usr")[3] - 1, labels = gdp_df_2021_continents$Continent, srt = 45, pos = 1, xpd = TRUE)
# text(y = seq(1, 6, by=20), par("usr")[1], labels = gdp_df_2021_continents$Continents, srt = 45, pos = 1, xpd = TRUE)
life_expectancy <- read_csv("../data/life_expectancy.csv", skip = 4)
life_expectancy
NA
life_expectancy_rename <- life_expectancy |>
rename(c(Country_Code = `Country Code`, Indicator_Name = `Indicator Name`, Indicator_Code = `Indicator Code`))
drop.cols <- c("Country_Code", "Indicator_Name", "Indicator_Code", "2023", "...69")
life_expectancy_dropped <- life_expectancy_rename |>
select(-one_of(drop.cols))
life_expectancy_dropped
NA
country_life_expectancy <- life_expectancy_dropped |>
pivot_longer(!`Country Name`, names_to = "Year", values_to = "Life_Expectancy") |>
rename(Country = `Country Name`)
country_life_expectancy
NA
country_life_expectancy |>
filter(Country=="Zimbabwe")
NA
country_life_expectancy |>
filter(Life_Expectancy >= 80) |>
arrange(Year) |>
slice(1:10)
# |>
# group_by(Country) |>
# slice(1:10) |>
# ungroup() |>
# arrange(Year)
# country_life_expectancy |>
# filter(as.numeric(as.character(Life_Expectancy)) >= 80) |>
# arrange(Year)
gdp_le <- merge(gdp_df, country_life_expectancy, by=c("Country", "Year"))
head(gdp_le)
tail(gdp_le)
NA
gdp_le |>
filter(Country=="United Kingdom")
NA
gdp_le_2021 <- gdp_le |>
filter(Year == "2021")
#
gdp_le_2021
NA
How many countries have a life expectancy of at least 80 in 2021?
gdp_le_2021 |>
filter(Life_Expectancy >= 80)
NA
NA
length(which(gdp_le_2021$Life_Expectancy >= 80))
[1] 31
gdp_le_2021 |>
arrange(desc(GDP_Per_Capita)) |>
slice(1:3)
NA
Create a plot showing the change in life expectancy over time for these three countries. This plot should be faceted so that each country is contained in its own figure.
life_exp_facet <- country_life_expectancy |>
filter(Country == c("Luxembourg", "Singapore", "Ireland"))
#
# ggplot(life_exp_facet, aes(x = Year, y = Life_Expectancy, color = Country)) +
# geom_line() +
# labs(title = "Life Expectancy over time",
# y = "Life Expectancy",
# color = "Country") +
# facet_grid(. ~ Country)
# ggplot(data = life_exp_facet, aes(x = Year, y = Life_Expectancy)) +
# geom_line(color = "steelblue") +
# labs(title = "Life Expectancy Over Time",
# x = "Year",
# y = "Life Expectancy") +
# facet_wrap(~ Country, ncol = 3) +
# theme_bw()
life_exp_facet |>
ggplot(aes(x=Year, y=Life_Expectancy)) +
geom_point() +
facet_grid(cols = vars(Country))
input <- gdp_le_2021[, c("Life_Expectancy", "GDP_Per_Capita")]
plot(x = input$Life_Expectancy, y = input$GDP_Per_Capita,
xlab = "Life Expectancy",
ylab = "GDP Per Capita",
main = "Weight vs Milage"
)
# ggplot(gdp_le_2021, aes(x = "Life_Expectancy", y = "GDP_Per_Capita")) +
# geom_point(aes(color = factor(Continent)))
gdp_le_2021_dropped <- gdp_le_2021 |>
filter(!is.na(Life_Expectancy) & !is.na(GDP_Per_Capita))
cor(gdp_le_2021_dropped$Life_Expectancy, gdp_le_2021_dropped$GDP_Per_Capita, method = "pearson")
[1] 0.7450812
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.